check that because you zone for one place in em school, doesn’t mean all people who zoned to that em school will zone to same mid school
We took one zone and proved that you cannot make that assumption
zoneData %>% filter(zoned_elm_dbn == "01M015") %>% group_by(zoned_mid_dbn) %>% summarise(count = n())
## # A tibble: 3 x 2
## zoned_mid_dbn count
## <chr> <int>
## 1 01M972 155
## 2 01M973 339
## 3 <NA> 3
count_df <- zoneData %>% group_by(year, student_id_scram) %>% summarise(count = n()) %>%
group_by(count)%>% summarise()
count_df
## # A tibble: 1 x 1
## count
## <int>
## 1 1
see na’s in the data
zoneData %>% filter(is.na(zoned_elm_dbn))
## # A tibble: 152,198 x 12
## student_id_scram res_zip_cde zoned_elm_dbn zoned_mid_dbn zoned_hs_dbn
## <int> <chr> <chr> <chr> <chr>
## 1 411702894 11233 <NA> <NA> <NA>
## 2 981802723 10027 <NA> <NA> <NA>
## 3 288472869 11233 <NA> <NA> <NA>
## 4 347572670 11233 <NA> <NA> <NA>
## 5 166902009 11235 <NA> <NA> <NA>
## 6 239802916 10009 <NA> <NA> <NA>
## 7 98612290 10002 <NA> <NA> <NA>
## 8 41602964 10002 <NA> <NA> <NA>
## 9 890612332 10002 <NA> <NA> <NA>
## 10 673802946 10027 <NA> <NA> <NA>
## # ... with 152,188 more rows, and 7 more variables: census_block <chr>,
## # census_tract <chr>, year <dbl>, res_boro <chr>, res_district <int>,
## # audit_dte <int>, last_change_dte <int>
zoneData %>% group_by(zoned_elm_dbn) %>% tally
## # A tibble: 949 x 2
## zoned_elm_dbn n
## <chr> <int>
## 1 00678 1
## 2 00767 1
## 3 00921 1
## 4 00961 1
## 5 00976 1
## 6 01M015 497
## 7 01M019 615
## 8 01M020 589
## 9 01M034 569
## 10 01M063 704
## # ... with 939 more rows
Use census tracts to group students by area
tracts <- zoneData %>% group_by(census_tract) %>% summarise(numStudents =n())
tracts%>% head()
## # A tibble: 6 x 2
## census_tract numStudents
## <chr> <int>
## 1 000100 332
## 2 000200 1493
## 3 000201 516
## 4 000202 891
## 5 000300 220
## 6 000301 131
tracts_map <- merge(nyc_tracts, tracts, by.x = "TRACTCE", by.y ="census_tract")
#plot(nyc_tracts)
leaflet(nyc_tracts) %>%
addTiles() %>%
addPolygons(popup = ~paste("Tract:", TRACTCE)) %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-73.98, 40.75, zoom = 13)
leaflet(tracts_map) %>%
addTiles() %>%
addPolygons(popup = ~paste("Num students:", numStudents)) %>%
addProviderTiles("CartoDB.Positron")%>%
setView(-73.98, 40.75, zoom = 13)
number of each zips in our dataset
zoneData%>%
group_by(res_zip_cde) %>%
summarise(count = n())
## # A tibble: 10,583 x 2
## res_zip_cde count
## <chr> <int>
## 1 0 116
## 2 00000 129
## 3 00002 1
## 4 00018 1
## 5 00048 1
## 6 00051 1
## 7 00064 1
## 8 00072 1
## 9 00104 1
## 10 00110 1
## # ... with 10,573 more rows
#r <- GET("http://www2.census.gov/geo/tiger/TIGER2010DP1/County_2010Census_DP1.zip")
#r<- GET("https://data.cityofnewyork.us/download/hkaz-iizd/application%2Fzip")
#r<- GET("https://data.cityofnewyork.us/api/geospatial/mshx-yvwq?method=export&format=GeoJSON")
#dbns <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)
Get elementary data from NYC open data
#elementary data only
r<- GET("https://data.cityofnewyork.us/api/geospatial/cq6p-iwiy?method=export&format=GeoJSON")
dbns <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)
## No encoding supplied: defaulting to UTF-8.
## Warning in readOGR(content(r, "text"), "OGRGeoJSON", verbose = F): Dropping
## null geometries: 752
#summary(dbns)
#dbns@data$dbn
zones_df <- tidy(dbns)
## Regions defined for each Polygons
#ggplot() +
# geom_polygon(data=zones_df, aes(x=long, y=lat, group=group), alpha = .25, colour = "black", fill = NA)
nyc_map <- get_map(location = c(lon = -74.00, lat = 40.71), maptype = "terrain", zoom = 11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=40.71,-74&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
#ggmap(nyc_map) +
# geom_polygon(data=zones_df, aes(x=long, y=lat, group=group), alpha = .25, colour = "black", fill = NA)
leaflet(dbns) %>%
addTiles() %>%
addPolygons(popup = ~paste("School DBN:", dbn))%>%
addProviderTiles("CartoDB.Positron")%>%
setView(-73.98, 40.75, zoom = 13)
#zone_dbn <- zoneData %>% group_by(zoned_elm_dbn) %>% summarise(count = n())
zone_dbn <- zoneData %>% filter(year ==2015) %>% group_by(zoned_elm_dbn) %>% summarise(count = n())
#remove na counts
zone_dbn <- zone_dbn %>% filter(!is.na(count))
map_data <- merge(dbns, zone_dbn, by.x = "dbn", by.y ="zoned_elm_dbn")
pal <- colorNumeric(palette = "RdBu",
domain = range(map_data@data$count, na.rm=T))
leaflet(map_data) %>%
addTiles() %>%
addPolygons(fillColor = ~pal(count), popup = ~paste("Num students in 2015:", as.character(count)))%>%
addProviderTiles("CartoDB.Positron") %>%
setView(-73.98, 40.75, zoom = 13)
#Use zip Codes to map data ## This is across ALL YEARS!
#filter out na zips
zone_zip <-zoneData %>%
mutate(res_zip_cde = as.integer(res_zip_cde)) %>%
filter(!is.na(res_zip_cde)) %>%
select(student_id_scram, res_zip_cde, contains("zoned")) %>%
group_by(res_zip_cde) %>%
summarise(numStudents = n())
## Warning in evalq(as.integer(res_zip_cde), <environment>): NAs introduced by
## coercion
testZip <- GET("http://catalog.civicdashboards.com/dataset/11fd957a-8885-42ef-aa49-5c879ec93fac/resource/28377e88-8a50-428f-807c-40ba1f09159b/download/nyc-zip-code-tabulation-areas-polygons.geojson")
zips <- readOGR(content(testZip,'text'), 'OGRGeoJSON', verbose = F)
## No encoding supplied: defaulting to UTF-8.
#summary(zips)
#zips_df <- tidy(zips)
map_data_zip <- merge(zips, zone_zip, by.y = "res_zip_cde", by.x ="postalCode")
leaflet(map_data_zip) %>%
addTiles() %>%
addPolygons(popup = ~paste("Number Students", as.character(numStudents)))%>%
addProviderTiles("CartoDB.Positron") %>%
setView(-73.98, 40.75, zoom = 13)
nyc_map <- get_map(location = c(lon = -74.00, lat = 40.71), maptype = "terrain", zoom = 11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=40.71,-74&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
#dbnBioData <- bios %>%
# select(student_id_scram, dbn, grade_level, year)
# dbnBioData <- rename(dbnBioData, BioDbn = dbn)
#merged <- merge(dbnBioData, zoneData, by = "student_id_scram" )
write.table(tracts, file = "tracts.txt", sep = "\t" , col.names = TRUE)